1 Objectives

2 Pre-requisites

Load some packages:

library(tidyverse) # manipulate and visulaize data
theme_set(theme_light(base_size = 16)) # set theme for visualisation and font size
library(unmarked) # occupancy modelling
library(sf) # spatial dataviz

3 Data

We use data from Mapping and explaining wolf recolonization in France using dynamic occupancy models and opportunistic data by Louvrier et al. in Ecography available at https://onlinelibrary.wiley.com/doi/abs/10.1111/ecog.02874. Data were collected between 1994 and 2017 (23 years or primary occasion). Occasions are months December, January, February and March (4 visits or secondary occasions). A 100km\(^2\) grid was overlaid over France and detections and non-detections were collected. See paper for more details.

3.1 Read in data

First detection/non-detection data:

y <- readRDS("data/wolf_louvrier.rds")
dim(y)
## [1] 3450   92

There are > 3000 cells in row. In columns we have 23 years * 4 visits, arranged in visits within year format, that is visit 1 to visit 4 for year 1994 in the first four columns, then visit 1 to visit 4 for year 1995, and so on.

Now read in some covariates data. First the survey effort which is (approx) the number of observers per cell:

effort <- readRDS("data/effort_louvrier.rds")

The effort covariate is a yearly site-level covariate, with values that do not vary between visits within a year, but that do vary between years:

dim(effort)
## [1] 3450   23

We have some environmental covariates as well, all site-level covariates:

envcov <- readRDS("data/envcov_louvrier.rds")

The 7 covariates are specifically in column: farmland cover, altitude, distance to closest barrier, road density, forest cover, high altitude, rock cover:

colnames(envcov)
## [1] "p_agri"   "p_alti"   "p_dbarr"  "p_road"   "p_forest" "p_halt"   "p_rock"

Last, we have two additional yearly site-level covariates which capture dispersal abilities of wolves, namely the number of occupied neighboring cells at short (10km) and long (150km) distance:

# short distance
SDAC <- readRDS("data/shortd_spatialautocorr.rds")
# long distance 
LDAC <- readRDS("data/longd_spatialautocorr.rds")

3.2 Format data

Site-level covariates first:

sites.covs <- data.frame(
  forest = envcov[,"p_forest"], # forest cover
  agr = envcov[,"p_agri"], # farmland cover
  rock = envcov[,"p_rock"], # rock cover
  halt = envcov[,"p_halt"], # high altitude
  alt = envcov[,"p_alti"], # altitude
  dbarr = envcov[,"p_dbarr"], # distance to closest barrier
  acc = envcov[,"p_road"]) # road density

On rassemble les covariables dont les valeurs diffèrent selon la cellule et l’année.

year <- matrix(rep(c('01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23'),nrow(y)), 
               nrow = nrow(y), 
               ncol = 23, 
               byrow = TRUE)
head(year)
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## [1,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10"  "11"  "12"  "13"  "14" 
## [2,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10"  "11"  "12"  "13"  "14" 
## [3,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10"  "11"  "12"  "13"  "14" 
## [4,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10"  "11"  "12"  "13"  "14" 
## [5,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10"  "11"  "12"  "13"  "14" 
## [6,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10"  "11"  "12"  "13"  "14" 
##      [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23]
## [1,] "15"  "16"  "17"  "18"  "19"  "20"  "21"  "22"  "23" 
## [2,] "15"  "16"  "17"  "18"  "19"  "20"  "21"  "22"  "23" 
## [3,] "15"  "16"  "17"  "18"  "19"  "20"  "21"  "22"  "23" 
## [4,] "15"  "16"  "17"  "18"  "19"  "20"  "21"  "22"  "23" 
## [5,] "15"  "16"  "17"  "18"  "19"  "20"  "21"  "22"  "23" 
## [6,] "15"  "16"  "17"  "18"  "19"  "20"  "21"  "22"  "23"
trendyear <- matrix(rep(1:23,nrow(y)), 
                    nrow=nrow(y),
                    ncol=23, 
                    byrow=TRUE)
head(trendyear)
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## [1,]    1    2    3    4    5    6    7    8    9    10    11    12    13    14
## [2,]    1    2    3    4    5    6    7    8    9    10    11    12    13    14
## [3,]    1    2    3    4    5    6    7    8    9    10    11    12    13    14
## [4,]    1    2    3    4    5    6    7    8    9    10    11    12    13    14
## [5,]    1    2    3    4    5    6    7    8    9    10    11    12    13    14
## [6,]    1    2    3    4    5    6    7    8    9    10    11    12    13    14
##      [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23]
## [1,]    15    16    17    18    19    20    21    22    23
## [2,]    15    16    17    18    19    20    21    22    23
## [3,]    15    16    17    18    19    20    21    22    23
## [4,]    15    16    17    18    19    20    21    22    23
## [5,]    15    16    17    18    19    20    21    22    23
## [6,]    15    16    17    18    19    20    21    22    23

Yearly site-level covariates:

yearly.site.covs <- list(
  effort = effort, # effort
  year = year, # year effect
  trendyear = trendyear, # linear trend over time
  SDAC = SDAC, # short distance
  LDAC = LDAC) # long distance

On rassemble les covariables dont les valeurs diffèrent selon les occasions secondaires.

occ <- array(0,c(3450,4,23))
for (i in 1:3450){
    for (j in 1:23){
        occ[i,1:4,j] <- c('1','2','3','4') # dec, jan, feb, mar
        if (effort[i,j] == 0) occ[i,1:4,j] <- NA
                }
    }
occbis <- occ[,,1]
for (i in 2:23){
    occbis <- cbind(occbis,occ[,,i])
}
dim(occbis) # 3450 x 92 ou 92 est 4 occ x 23 ans
## [1] 3450   92
obs.covs <- list(OCC = occbis)

Organise detection/non-detection data and covariates in data.frame that can be used by unmarked:

umf <- unmarkedMultFrame(y = y, 
                         siteCovs = sites.covs,
                         yearlySiteCovs = yearly.site.covs, 
                         obsCovs = obs.covs,
                         numPrimary = 23)

Summarize data:

summary(umf)
## unmarkedFrame Object
## 
## 3450 sites
## Maximum number of observations per site: 92 
## Mean number of observations per site: 57.91 
## Number of primary survey periods: 23 
## Number of secondary survey periods: 4 
## Sites with at least one detection: 547 
## 
## Tabulation of y observations:
##      0      1      2   <NA> 
## 196086   3215    503 117596 
## 
## Site-level covariates:
##      forest               agr                rock                halt          
##  Min.   :-1.218838   Min.   :-1.50457   Min.   :-0.253118   Min.   :-0.172297  
##  1st Qu.:-0.939161   1st Qu.:-0.83057   1st Qu.:-0.253118   1st Qu.:-0.172297  
##  Median :-0.102772   Median : 0.04001   Median :-0.253118   Median :-0.172297  
##  Mean   : 0.003797   Mean   :-0.01065   Mean   : 0.006638   Mean   : 0.004287  
##  3rd Qu.: 0.754320   3rd Qu.: 0.82406   3rd Qu.:-0.253118   3rd Qu.:-0.172297  
##  Max.   : 3.087256   Max.   : 1.89463   Max.   : 8.074589   Max.   :11.368946  
##       alt               dbarr                acc           
##  Min.   :-0.99359   Min.   :-1.153377   Min.   :-1.732584  
##  1st Qu.:-0.61255   1st Qu.:-0.825046   1st Qu.:-0.682261  
##  Median :-0.33687   Median :-0.248552   Median : 0.329160  
##  Mean   : 0.01199   Mean   : 0.002765   Mean   :-0.005365  
##  3rd Qu.: 0.34066   3rd Qu.: 0.573661   3rd Qu.: 0.757069  
##  Max.   : 4.41822   Max.   : 3.898308   Max.   : 1.846292  
## 
## Observation-level covariates:
##    OCC        
##  1   : 49951  
##  2   : 49951  
##  3   : 49951  
##  4   : 49951  
##  NA's:117596  
## 
## Yearly-site-level covariates:
##      effort            year         trendyear       SDAC      
##  Min.   :0.0000   01     : 3450   Min.   : 1   Min.   :0.000  
##  1st Qu.:0.0000   02     : 3450   1st Qu.: 6   1st Qu.:0.000  
##  Median :0.1278   03     : 3450   Median :12   Median :0.000  
##  Mean   :0.6243   04     : 3450   Mean   :12   Mean   :0.209  
##  3rd Qu.:0.7983   05     : 3450   3rd Qu.:18   3rd Qu.:0.000  
##  Max.   :8.0075   06     : 3450   Max.   :23   Max.   :8.000  
##                   (Other):58650                               
##       LDAC        
##  Min.   :-1.0912  
##  1st Qu.:-0.6831  
##  Median :-0.4912  
##  Mean   : 0.0171  
##  3rd Qu.: 0.5300  
##  Max.   : 2.5113  
## 

4 Model with constant parameters, except detection which is a function of effort

Fit a model with all parameters constant,but detection which is a function of effort:

fm0 <- colext(
  psiformula = ~ 1,     # initial occupancy
  gammaformula =  ~ 1,  # colonization
  epsilonformula = ~ 1, # extinction
  pformula = ~ effort,  # detection
  data = umf, # data
  control = list(trace = 1)) # display number of iterations by blocks of 10
## initial  value 37465.552381 
## iter  10 value 12706.122901
## iter  20 value 11195.999294
## iter  30 value 10963.405378
## final  value 10963.302306 
## converged

Inspect results:

fm0
## 
## Call:
## colext(psiformula = ~1, gammaformula = ~1, epsilonformula = ~1, 
##     pformula = ~effort, data = umf, control = list(trace = 1))
## 
## Initial:
##  Estimate    SE     z  P(>|z|)
##     -6.17 0.673 -9.16 5.25e-20
## 
## Colonization:
##  Estimate     SE     z P(>|z|)
##     -4.01 0.0509 -78.6       0
## 
## Extinction:
##  Estimate     SE     z  P(>|z|)
##      -1.3 0.0653 -19.9 5.89e-88
## 
## Detection:
##             Estimate     SE     z   P(>|z|)
## (Intercept)   -2.236 0.0819 -27.3 3.70e-164
## effort         0.543 0.0276  19.7  1.37e-86
## 
## AIC: 21936.6

Model coefficients (or parameters):

coef(fm0)
##   psi(Int)   col(Int)   ext(Int)     p(Int)  p(effort) 
## -6.1658594 -4.0053675 -1.2988148 -2.2358314  0.5434858

Get parameter estimates on natural scale. Start with colonization:

backTransform(fm0, type = "col")
## Backtransformed linear combination(s) of Colonization estimate(s)
## 
##  Estimate       SE LinComb (Intercept)
##    0.0179 0.000895   -4.01           1
## 
## Transformation: logistic

Then extinction:

backTransform(fm0, type = "ext")
## Backtransformed linear combination(s) of Extinction estimate(s)
## 
##  Estimate    SE LinComb (Intercept)
##     0.214 0.011    -1.3           1
## 
## Transformation: logistic

And initial occupancy:

backTransform(fm0, type = "psi")
## Backtransformed linear combination(s) of Initial estimate(s)
## 
##  Estimate      SE LinComb (Intercept)
##    0.0021 0.00141   -6.17           1
## 
## Transformation: logistic

Finally, we get detection as a function of effort:

# grid
effort_grid <- seq(min(yearly.site.covs$effort), max(yearly.site.covs$effort), length = 100)
# predict on logit scale
logit_det <- coef(fm0)[4] + coef(fm0)[5] * effort_grid
# backtransform
det <- plogis(logit_det)
# plot
plot(x = effort_grid, 
     y = det, 
     type = 'l', 
     xlab = "effort", 
     ylab = "estimated detection probability", 
     lwd= 3)

5 Model with all covariates as in the Louvrier et al paper

Here we fit the model with all covariates as considered by Louvrier and colleagues in their paper:

fm <- colext(
  psiformula = ~ 1,                                            # initial occupancy
  gammaformula =  ~ forest + agr + halt + alt + SDAC + LDAC,  # colonization
  epsilonformula = ~ 1,                                       # extinction
  pformula = ~ effort + acc + OCC,                                # detection
  data = umf, # data
  control = list(trace = 1), # display number of iterations by blocks of 10
  se = FALSE) # do not compute SE and confidence intervals to reduce computational burden
## initial  value 37465.552381 
## iter  10 value 12721.165262
## iter  20 value 11671.497667
## iter  30 value 10755.918886
## iter  40 value 9850.393837
## iter  50 value 9803.647828
## final  value 9799.071153 
## converged

Inspect results (NA’s are produced because we did not compute SE’s when we fitted the model):

fm
## 
## Call:
## colext(psiformula = ~1, gammaformula = ~forest + agr + halt + 
##     alt + SDAC + LDAC, epsilonformula = ~1, pformula = ~effort + 
##     acc + OCC, data = umf, se = FALSE, control = list(trace = 1))
## 
## Initial:
##  Estimate SE  z P(>|z|)
##     -6.28 NA NA      NA
## 
## Colonization:
##             Estimate SE  z P(>|z|)
## (Intercept)   -5.166 NA NA      NA
## forest         0.929 NA NA      NA
## agr            0.768 NA NA      NA
## halt          -0.144 NA NA      NA
## alt            0.834 NA NA      NA
## SDAC           0.576 NA NA      NA
## LDAC           0.384 NA NA      NA
## 
## Extinction:
##  Estimate SE  z P(>|z|)
##     -1.11 NA NA      NA
## 
## Detection:
##             Estimate SE  z P(>|z|)
## (Intercept)   -2.497 NA NA      NA
## effort         0.393 NA NA      NA
## acc           -0.842 NA NA      NA
## OCC2           0.495 NA NA      NA
## OCC3           0.436 NA NA      NA
## OCC4           0.419 NA NA      NA
## 
## AIC: 19628.14

Model coefficients are:

coef(fm)
##    psi(Int)    col(Int) col(forest)    col(agr)   col(halt)    col(alt) 
##  -6.2803098  -5.1664074   0.9291481   0.7678356  -0.1435305   0.8342459 
##   col(SDAC)   col(LDAC)    ext(Int)      p(Int)   p(effort)      p(acc) 
##   0.5761154   0.3843558  -1.1123998  -2.4965811   0.3927426  -0.8415244 
##     p(OCC2)     p(OCC3)     p(OCC4) 
##   0.4949260   0.4359181   0.4188788

Parameter estimates are given on the logit scale. You may compare them to those obtained by Louvrier and colleagues using a Bayesian approach.

6 Map occupancy

There are two ways to map annual occupancy. We may compute the occupancy probability \(\psi_{i,t}\), or we rely directly on realized occupancy, that is the latent state \(z_{i,t}\) which tells us whether cell \(i\) is occupied in year \(t\) (\(z_{i,t} = 1\)) or not (\(z_{i,t} = 0\)). We go for realized occupancy. To do so, we need the \(z\) estimates that we can get in unmarked via empirical Bayes methods: The posterior distribution of \(z\) is estimated with data and the parameter estimates we obtained from fitting our model above. The mode of the posterior distribution is the “empirical best unbiased predictor (EBUP)”.

re <- ranef(fm) # estimate posterior distribution of z
z_mode <- bup(re, stat = "mode") # get mode of posterior distribution

L’objet z_mode contient les \(z\) estimés avec en lignes les cellules de la grille et en colonnes les années. Mettons tout ça sur une carte.

Get a map of France:

france <-  st_read("shp/pays/Country.shp")
## Reading layer `Country' from data source 
##   `/Users/oliviergimenez/Dropbox/OG/GITHUB/occupancy-workshop/tutorials/shp/pays/Country.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 54 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -2177943 ymin: 5251488 xmax: 4523874 ymax: 11388590
## Projected CRS: RGF93 v1 / Lambert-93

then our 10x10km grid:

grid_rect <- st_read("shp/grille10par10/grille_France.shp") %>% 
  st_transform(crs = st_crs(france))
## Reading layer `grille_France' from data source 
##   `/Users/oliviergimenez/Dropbox/OG/GITHUB/occupancy-workshop/tutorials/shp/grille10par10/grille_France.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 5160 features and 1 field
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 510000 ymin: 6110000 xmax: 1110000 ymax: 6970000
## Projected CRS: RGF93 v1 / Lambert-93

On récupère les coordonnées de nos cellules échantillonnées au moins une fois au cours de l’étude.

coord <- readRDS("data/coordcells_wolf.rds")
grid <- coord %>% 
  st_as_sf(coords = c('X','Y'), crs = st_crs(grid_rect))

L’objet grid ainsi créé est un objet spatial sf de type POINT, il me faudrait plutôt des POLYGONS pour les représenter en couleur sur la grille.

grid_poly <- st_join(grid_rect, grid, join = st_covers)
grid_poly <- grid_poly[!is.na(grid_poly$id),]

Enfin, une carte ! On prend l’année 2017 la plus récente dans le jeu de données.

grid_rect %>% 
  ggplot() + 
  geom_sf(alpha = 0, lwd = 0.01) + 
  geom_sf(data = grid_poly, aes(fill = as_factor(z_mode[,23])), 
          lwd = 0.01) + 
  geom_sf(data = france %>% filter(NAME == "France"), alpha = 0) +
  scale_fill_manual(name = "",
                    values = c("gray90",
                               "steelblue1"),
                    labels = c("cellule non-occupée", 
                               "cellule occupée")) +
  labs(title = "Carte de l'occupancy du loup en 2017",
       subtitle = "Données réseau loup")

Construisons une fonction qui fait la carte de l’occupancy pour une année quelconque. Cette fonction prend comme argument year entre 1995 et 2017.

occ_plot <- function(year){
  if (year < 1995) stop("Le loup venait juste d'arriver, pas la peine d'en faire une carte")
  if (year > 2017) stop("Pas de données aussi récentes, va falloir attendre")
  index <- year - 1994
  grid_rect %>% 
  ggplot() + 
  geom_sf(alpha = 0, lwd = 0.01) + 
  geom_sf(data = grid_poly, aes(fill = as_factor(z_mode[,index])), 
          lwd = 0.01) + 
  geom_sf(data = france %>% filter(NAME == "France"), alpha = 0) +
  scale_fill_manual(name = "",
                    values = c("gray90",
                               "steelblue1"),
                    labels = c("non-occupée", 
                               "occupée")) +
  labs(subtitle = year) +
  theme(legend.position = "none")  
}

On fait un essai avec l’année 2016.

occ_plot(2016)

On fait 3 cartes pour les années 2000, 2010 et 2017.

library(patchwork)
plot_list <- lapply(X = c(2000, 2010, 2017), FUN = occ_plot)
plot_list[[1]] | plot_list[[2]] | plot_list[[3]]